library(estimatr)
pica2 <- function(Y, D, A, C, placebo, placebo_level, data, covariates = NULL){
  
  # Free Choice group
  data_d0   <- data[data[, D] == 0, ]
  data_d0$Y <- data_d0[,Y]
  C_list    <- setdiff(sort(unique(data_d0[, C])), placebo_level)
  effect_D0 <- list()
  if(is.null(covariates) == TRUE){
    free_formula <- as.formula(paste0(Y, "~ pl_inv"))
  }else{
    free_formula <- as.formula(paste0(Y, "~ pl_inv +", paste(covariates, collapse = "+")))
  }
  for(i in 1:length(C_list)){
    data_d0_use <- data_d0[data_d0[, C] == C_list[i], ]
    data_d0_use$pl_inv <- 1 - data_d0_use$pl
    lm_use <- lm_robust(free_formula, data = data_d0_use)
    effect_D0[[i]] <- summary(lm_use)$coefficients
  }
  names(effect_D0) <- paste0("C=", C_list)
  
  # Forced Choice group
  data_d1   <- data[data[, D] == 1, ]
  data_d1$Y <- data_d1[,Y]
  data_d1$A <- factor(data_d1[, A], levels = c(placebo_level, C_list)) # use placebo_level as ref
  if(is.null(covariates) == TRUE){
    forced_formula <- as.formula(paste0(Y, "~ A"))
  }else{
    forced_formula <- as.formula(paste0(Y, "~ A +", paste(covariates, collapse = "+")))
  }
  lm_d1 <- lm_robust(forced_formula, data = data_d1)
  lm_d1_print <- lm(forced_formula, data = data_d1)
  # level_use <- levels(factor(data_d1[, A]))
  # data_d1$A <- relevel(factor(data_d1[, A]), ref = level_use[length(level_use)])
  # lm_d2 <- lm_robust(Y ~ A, data = data_d1)
  length_A <- length(unique(data_d1$A))
  ATE   <- summary(lm_d1)$coefficients[c(2:length_A), c(1,2,4,5,6)]
  ACTE_view <- matrix(NA, nrow = length(effect_D0), ncol = 5)
  for(i in 1:length(effect_D0)){
    ACTE_view[i, 1:5] <- effect_D0[[i]][2, c(1,2,4,5,6)]
  }
  rownames(ACTE_view) <- names(effect_D0)
  colnames(ACTE_view) <- colnames(ATE)
  
  # Estimation of ACTE-non-viewer
  ACTE_non_view <- matrix(NA, nrow = length(effect_D0), ncol = 5)
  for(i in 1:length(C_list)){
    prob_not_C <- mean(data[data[,D]==0, C] != C_list[i])
    prob_C <- 1 - prob_not_C
    est_u <- ATE[i, 1]/prob_not_C - ACTE_view[i, 1]*(prob_C/prob_not_C)
    var_u <- ATE[i,2]^2/(prob_not_C)^2 + (ACTE_view[i, 2]^2)*((prob_C/prob_not_C)^2)
    ACTE_non_view[i, 1:5] <- c(est_u, sqrt(var_u), 2*(1 - pnorm(abs(est_u/sqrt(var_u)))),
                               est_u - 1.96*sqrt(var_u), est_u + 1.96*sqrt(var_u))
  }
  rownames(ACTE_non_view) <- names(effect_D0)
  colnames(ACTE_non_view) <- colnames(ATE)
  
  # p-value
  ATE[, 3] <- round(ATE[, 3], 4)
  ACTE_view[, 3] <- round(ACTE_view[, 3], 4)
  ACTE_non_view[, 3] <- round(ACTE_non_view[, 3], 4)
  
  out <- list("ATE" = ATE, "ACTE_view" = ACTE_view, "ACTE_non_view" = ACTE_non_view,
              "effect_D1" = summary(lm_d1)$coefficients,
              "effect_D0" = effect_D0,
              "lm_ate_out" = lm_d1_print)
  return(out)
}

# bootstrap
pica2_boot <- function(Y, D, A, C, placebo, placebo_level, data, covariates = NULL, stratify = NULL, boot_num = 1000, seed = 1234){
  
  data_d1   <- data[data[, D] == 1, ]
  data_d0   <- data[data[, D] == 0, ]
  length_A  <- length(unique(data_d1[, A]))
  length_C  <- length(setdiff(sort(unique(data_d0[, C])), placebo_level))
  
  ATE_boot <- matrix(NA, nrow = (length_A - 1), ncol = boot_num)
  ACTE_view_boot <- matrix(NA, nrow = length_C, ncol = boot_num)
  ACTE_non_view_boot <- matrix(NA, nrow = length_C, ncol = boot_num)
  
  res_boot <- list()
  set.seed(seed)
  for(b in 1:boot_num){
    if(is.null(stratify) == FALSE){
      stratify_list  <- unique(data[, stratify])
      data_base_boot <- list()
      for(i in 1:length(stratify_list)){
        data_base <- data[data[,stratify] == stratify_list[i], , drop = FALSE]
        boot_id   <- sample(x = seq(1:nrow(data_base)), size = nrow(data_base), replace = TRUE)
        data_base_boot[[i]] <- data_base[boot_id, , drop = FALSE]
      }
      data_boot <- do.call("rbind", data_base_boot)
    }else{
      boot_id   <- sample(x = seq(1:nrow(data)), size = nrow(data), replace = TRUE)
      data_boot <- data[boot_id, , drop = FALSE]
    }
    res_boot[[b]] <- pica2(Y = Y, D = D, A = A, C = C, placebo = placebo, placebo_level = placebo_level, data = data_boot)
    ATE_boot[1:(length_A - 1), b] <- res_boot[[b]]$ATE[, 1]
    ACTE_view_boot[1:length_C, b] <- res_boot[[b]]$ACTE_view[, 1]
    ACTE_non_view_boot[1:length_C, b] <- res_boot[[b]]$ACTE_non_view[, 1]
  }
  
  ATE_sum <- cbind(apply(ATE_boot, 1, mean, na.rm = TRUE), apply(ATE_boot, 1, sd, na.rm = TRUE),
                   2*(1 - pnorm(abs(apply(ATE_boot, 1, mean, na.rm = TRUE)/apply(ATE_boot, 1, sd, na.rm = TRUE)))),
                   apply(ATE_boot, 1, mean, na.rm = TRUE) - 1.96*apply(ATE_boot, 1, sd, na.rm = TRUE),
                   apply(ATE_boot, 1, mean, na.rm = TRUE) + 1.96*apply(ATE_boot, 1, sd, na.rm = TRUE))
  ACTE_view_sum <- cbind(apply(ACTE_view_boot, 1, mean, na.rm = TRUE), apply(ACTE_view_boot, 1, sd, na.rm = TRUE),
                         2*(1 - pnorm(abs(apply(ACTE_view_boot, 1, mean, na.rm = TRUE)/apply(ACTE_view_boot, 1, sd, na.rm = TRUE)))),
                         apply(ACTE_view_boot, 1, mean, na.rm = TRUE) - 1.96*apply(ACTE_view_boot, 1, sd, na.rm = TRUE),
                         apply(ACTE_view_boot, 1, mean, na.rm = TRUE) + 1.96*apply(ACTE_view_boot, 1, sd, na.rm = TRUE))
  ACTE_non_view_sum <- cbind(apply(ACTE_non_view_boot, 1, mean, na.rm = TRUE), apply(ACTE_non_view_boot, 1, sd, na.rm = TRUE),
                             2*(1 - pnorm(abs(apply(ACTE_non_view_boot, 1, mean, na.rm = TRUE)/apply(ACTE_non_view_boot, 1, sd, na.rm = TRUE)))),
                             apply(ACTE_non_view_boot, 1, mean, na.rm = TRUE) - 1.96*apply(ACTE_non_view_boot, 1, sd, na.rm = TRUE),
                             apply(ACTE_non_view_boot, 1, mean, na.rm = TRUE) + 1.96*apply(ACTE_non_view_boot, 1, sd, na.rm = TRUE))
  colnames(ATE_sum) <- colnames(ACTE_view_sum) <- colnames(ACTE_non_view_sum) <- c("Estimate", "Std. Error", "Pr(>|t|)",   "CI Lower",   "CI Upper")
  rownames(ACTE_view_sum) <- rownames(ACTE_non_view_sum) <- paste0("C = ", setdiff(sort(unique(data_d0[, C])), placebo_level))
  rownames(ATE_sum) <- rownames(res_boot[[b]]$ATE)
  
  out <- list("ATE" = ATE_sum, "ACTE_view" = ACTE_view_sum, "ACTE_non_view" = ACTE_non_view_sum,
              "ATE_boot" = ATE_boot, 
              "ACTE_view_boot" = ACTE_view_boot, 
              "ACTE_non_view_boot" = ACTE_non_view_boot)
  return(out)
}

# I can do the bootstrap later.
test_pica2 <- function(Y, D, A, C, placebo, placebo_level, data){
  
  # Forced Choice group
  data_d1   <- data[data[, D] == 1, ]
  data_d1$Y <- data_d1[,Y]
  data_d1$A <- data_d1[, A]
  mean_Y_pl <- mean(data_d1$Y[data_d1$A == placebo_level], na.rm = TRUE)
  n_Y_pl    <- length(na.omit(data_d1$Y[data_d1$A == placebo_level]))
  var_Y_pl  <- var(data_d1$Y[data_d1$A == placebo_level], na.rm = TRUE)/n_Y_pl
  
  # Free Choice group
  data_d0   <- data[data[, D] == 0, ]
  data_d0$Y <- data_d0[,Y]
  C_list    <- setdiff(sort(unique(data_d0[, C])), placebo_level)
  prop_free_pl <- mean_Y_free_pl <- var_Y_free_pl <- c()
  for(i in 1:length(C_list)){
    data_d0_use <- data_d0[data_d0[, C] == C_list[i], ]
    prop_free_pl[i]   <- mean(data_d0[, C] == C_list[i], na.rm = TRUE)
    mean_Y_free_pl[i] <- mean(data_d0_use$Y[data_d0_use$pl == 1], na.rm = TRUE)
    n_Y_pl0    <- length(na.omit(data_d0_use$Y[data_d0_use$pl == 1]))
    var_Y_free_pl[i]  <- var(data_d0_use$Y[data_d0_use$pl == 1], na.rm = TRUE)/n_Y_pl0
  }
  ## Add placebo_level
  prop_free_pl   <- c(prop_free_pl, mean(data_d0[, C] == placebo_level, na.rm = TRUE))
  mean_Y_free_pl <- c(mean_Y_free_pl, mean(data_d0$Y[data_d0[, C] == placebo_level], na.rm = TRUE))
  n_use <- length(na.omit(data_d0$Y[data_d0[, C] == placebo_level]))
  var_Y_free_pl <- c(var_Y_free_pl, var(data_d0$Y[data_d0[, C] == placebo_level], na.rm = TRUE)/n_use)
  names(prop_free_pl) <- names(mean_Y_free_pl) <- paste0("C=", c(C_list, placebo_level))
  
  est_diff <- mean_Y_pl - sum(mean_Y_free_pl*prop_free_pl)
  se_diff  <- sqrt(var_Y_pl + sum(prop_free_pl^2*var_Y_free_pl))
  
  out <- c(mean_Y_pl, sum(mean_Y_free_pl*prop_free_pl),
           est_diff,
           se_diff,
           2*(1 - pnorm(abs(est_diff/se_diff))))
  
  names(out) <- c("Forced", "Free", "diff", "diff_SE", "p-value")
  return(out)
}

test_pica2_boot <- function(Y, D, A, C, placebo, placebo_level, data, stratify = NULL, boot_num = 100, seed = 1234){
  
  diff_boot <- c()
  set.seed(seed)
  for(b in 1:boot_num){
    if(is.null(stratify) == FALSE){
      stratify_list  <- unique(data[, stratify])
      data_base_boot <- list()
      for(i in 1:length(stratify_list)){
        data_base <- data[data[,stratify] == stratify_list[i], , drop = FALSE]
        boot_id   <- sample(x = seq(1:nrow(data_base)), size = nrow(data_base), replace = TRUE)
        data_base_boot[[i]] <- data_base[boot_id, , drop = FALSE]
      }
      data_boot <- do.call("rbind", data_base_boot)
    }else{
      boot_id   <- sample(x = seq(1:nrow(data)), size = nrow(data), replace = TRUE)
      data_boot <- data[boot_id, , drop = FALSE]
    }
    diff_boot[b] <- test_pica2_boot_base(Y = Y, D = D, A = A, C = C, placebo = placebo, placebo_level = placebo_level,
                                         data = data_boot)
  }
  
  est_diff <- mean(diff_boot)
  se_diff <- sd(diff_boot)
  pvalue  <- 2*(1 - pnorm(abs(est_diff/se_diff)))
  
  
  out <- c("diff" = est_diff, "diff_SE" = se_diff, "p-value" = pvalue)
  return(out)
}


test_pica2_boot_base <- function(Y, D, A, C, placebo, placebo_level, data){
  
  # Forced Choice group
  data_d1   <- data[data[, D] == 1, ]
  data_d1$Y <- data_d1[,Y]
  data_d1$A <- data_d1[, A]
  mean_Y_pl <- mean(data_d1$Y[data_d1$A == placebo_level], na.rm = TRUE)
  n_Y_pl    <- length(na.omit(data_d1$Y[data_d1$A == placebo_level]))
  var_Y_pl  <- var(data_d1$Y[data_d1$A == placebo_level], na.rm = TRUE)/n_Y_pl
  
  # Free Choice group
  data_d0   <- data[data[, D] == 0, ]
  data_d0$Y <- data_d0[,Y]
  C_list    <- setdiff(sort(unique(data_d0[, C])), placebo_level)
  prop_free_pl <- mean_Y_free_pl <- var_Y_free_pl <- c()
  for(i in 1:length(C_list)){
    data_d0_use <- data_d0[data_d0[, C] == C_list[i], ]
    prop_free_pl[i]   <- mean(data_d0[, C] == C_list[i], na.rm = TRUE)
    mean_Y_free_pl[i] <- mean(data_d0_use$Y[data_d0_use$pl == 1], na.rm = TRUE)
    n_Y_pl0    <- length(na.omit(data_d0_use$Y[data_d0_use$pl == 1]))
    var_Y_free_pl[i]  <- var(data_d0_use$Y[data_d0_use$pl == 1], na.rm = TRUE)/n_Y_pl0
  }
  ## Add placebo_level
  prop_free_pl   <- c(prop_free_pl, mean(data_d0[, C] == placebo_level, na.rm = TRUE))
  mean_Y_free_pl <- c(mean_Y_free_pl, mean(data_d0$Y[data_d0[, C] == placebo_level], na.rm = TRUE))
  n_use <- length(na.omit(data_d0$Y[data_d0[, C] == placebo_level]))
  var_Y_free_pl <- c(var_Y_free_pl, var(data_d0$Y[data_d0[, C] == placebo_level], na.rm = TRUE)/n_use)
  names(prop_free_pl) <- names(mean_Y_free_pl) <- paste0("C=", c(C_list, placebo_level))
  
  est_diff <- mean_Y_pl - sum(mean_Y_free_pl*prop_free_pl)
  return(est_diff)
}
